Usenet 1993 July

home *** CD-ROM | disk | FTP | other *** search

/ Usenet 1993 July / InfoMagic USENET CD-ROM July 1993.ISO / sources / misc / volume9 / compress.ms < prev next >

Wrap

Text File | 1989-11-16 | 36.9 KB | 1,353 lines

Newsgroups: comp.sources.misc subject: v09i005: 16 bit compress for MSDOS From: allbery@uunet.UU.NET (Brandon S. Allbery - comp.sources.misc) Reply-To: graham@tsmith.UUCP Posting-number: Volume 9, Issue 5 Submitted-by: graham@tsmith.UUCP Archive-name: compress.ms Recently, there have been people looking for source for compress.c to run under MSDOS. Here is one that may fit the bill. Doug. ------------------------------ Cut Here ------------------------------------ #! /bin/sh # This is a shell archive. Remove anything before this line, then feed it # into a shell via "sh file" or similar. To overwrite existing files, # type "sh file -c". # The tool that generated this appeared in the comp.sources.unix newsgroup; # send mail to comp-sources-unix@uunet.uu.net if you want that tool. # If this archive is complete, you will see the following message at the end: # "End of shell archive." # Contents: README makefile compress.c # Wrapped by graham@tsmith on Wed Nov 15 20:52:00 1989 PATH=/bin:/usr/bin:/usr/ucb ; export PATH if test -f 'README' -a "${1}" != "-c" ; then echo shar: Will not clobber existing file \"'README'\" else echo shar: Extracting \"'README'\" $609 characters$ sed "s/^X//" >'README' <<'END_OF_FILE' XHere is a version of compress 4.0 hacked for MSDOS. A makefile is provided Xwhich will compile it using Microsoft C, Turbo C, or Zortech C. The makefile Xwill need editing if other than the Microsoft compiler is used. XThe program requires about 400K to run. It takes the same command line Xargs as does the UNIX program of the same name, and should be compatible Xin all ways with that program. It will decode a 16 bit compressed file, Xand can generate the same. On my machine, it decodes about twice as quickly Xas the "u16" decompress program posted earlier to c.s.m. X XDoug Graham. Xuunet!mitel!sce!tsmith!graham END_OF_FILE if test 609 -ne `wc -c <'README'`; then echo shar: \"'README'\" unpacked with wrong size! fi # end of 'README' fi if test -f 'makefile' -a "${1}" != "-c" ; then echo shar: Will not clobber existing file \"'makefile'\" else echo shar: Extracting \"'makefile'\" $1083 characters$ sed "s/^X//" >'makefile' <<'END_OF_FILE' X# X# Makefile for compress. X# X# If memory usage is a problem under DOS, you may want to do a X# X# "exemod compress.exe /MAX 0" X# X# in order to reduce the size of the near heap. If this is done on the X# Microsoft executable, memory requirements drop to about 380K from 410K X# Depending on how the other compilers manage their near/far heaps, this X# should have similar results there as well. X# X XDOSDEFS = -Di8088 -DMSDOS -DPROTO X X# X# Microsoft C 5.0 under MSDOS X# X# The resulting executable is faster by about 20% than either Turbo C, X# or Zortech C. X# Xcompress.exe: compress.c X cl -o compress.exe -W3 -Ox -DMSC $(DOSDEFS) compress.c X X# X# Turbo C 2.0 under MSDOS X# X# compress.exe: compress.c X# tcc -ecompress.exe -Z -O -G -w $(DOSDEFS) compress.c X X# X# Zortech C under MSDOS X# X# compress.exe: compress.c X# ztc -ocompress.exe -o $(DOSDEFS) compress.c X X# X# Sun OS 3.5. X# Compression is slightly slower than /usr/ucb/compress probably X# because the compiler is doing lots of "extl"'s. Decompression X# is slightly faster. X# X# compress: compress.c X# cc -O -DBSD4_2 -o compress compress.c END_OF_FILE if test 1083 -ne `wc -c <'makefile'`; then echo shar: \"'makefile'\" unpacked with wrong size! fi # end of 'makefile' fi if test -f 'compress.c' -a "${1}" != "-c" ; then echo shar: Will not clobber existing file \"'compress.c'\" else echo shar: Extracting \"'compress.c'\" $32700 characters$ sed "s/^X//" >'compress.c' <<'END_OF_FILE' X/* X * Compress - data compression program X */ Xstatic char rcs_ident[] = "@(#) compress,v 4.1 (DOS) 89/11/10 02:43:00 doug Release $"; X X/* X * compress.c - File compression ala IEEE Computer, June 1984. X * X * Authors: Spencer W. Thomas (decvax!harpo!utah-cs!utah-gr!thomas) X * Jim McKie (decvax!mcvax!jim) X * Steve Davies (decvax!vax135!petsd!peora!srd) X * Ken Turkowski (decvax!decwrl!turtlevax!ken) X * James A. Woods (decvax!ihnp4!ames!jaw) X * Joe Orost (decvax!vax135!petsd!joe) X * Doug Graham (uunet!mitel!sce!tsmith!graham) X * X * Revision 4.1 (DOS) 89/11/10 02:43:00 doug X * Ported to MSDOS. Still works elsewhere, but maybe not as quickly. X * Removed as much long arithmetic as possible for speed on 16 bit machines. X * Use unsigned short's instead. Changed secondary hashing function to limit X * hash table size to 64K. This means table indexes can be 16 bit shorts. X * This compress will not generate codes from MAXMAXCODE (0xf000) thru X * 0xffff. Doesn't appear to hurt compression much. Removed speed hacks for X * other machines so I could understand the code. Added some for the i8088. X * Send CLEAR immediately when hash table fills instead of waiting for the X * compression ratio to drop. This is faster, and in some cases improves X * compression (but more often reduces it slightly). Junked the variable X * size hash table stuff because I am depending on 16 bit unsigned integer X * wrap around for indexing into hash table, so the table must have 2^16 X * entries. Took out the XENIX_16 stuff. The DOS way ought to work on Xenix X * as well, and should be faster, but I don't have access to Xenix in order X * to find out. Added some extra error checking on decompression to try to X * avoid blowing the machine out of the water when decompressing a corrupt X * file. Add "okunlink" to avoid the problem of losing the output file as X * well as the input file if ^C is hit at the wrong time. Lot's of other X * cosmetic changes. X * X * Revision 4.0 85/07/30 12:50:00 joe X * Removed ferror() calls in output routine on every output except first. X * Prepared for release to the world. X * X * Revision 3.6 85/07/04 01:22:21 joe X * Remove much wasted storage by overlaying hash table with the tables X * used by decompress: tab_suffix[1<<BITS], stack[8000]. Updated USERMEM X * computations. Fixed dump_tab() DEBUG routine. X * X * Revision 3.5 85/06/30 20:47:21 jaw X * Change hash function to use exclusive-or. Rip out hash cache. These X * speedups render the megamemory version defunct, for now. Make decoder X * stack global. Parts of the RCS trunks 2.7, 2.6, and 2.1 no longer apply. X * X * Revision 3.4 85/06/27 12:00:00 ken X * Get rid of all floating-point calculations by doing all compression ratio X * calculations in fixed point. X * X * Revision 3.3 85/06/24 21:53:24 joe X * Incorporate portability suggestion for M_XENIX. Got rid of text on #else X * and #endif lines. Cleaned up #ifdefs for vax and interdata. X * X * Revision 3.2 85/06/06 21:53:24 jaw X * Incorporate portability suggestions for Z8000, IBM PC/XT from mailing list. X * Default to "quiet" output (no compression statistics). X * X * Revision 3.1 85/05/12 18:56:13 jaw X * Integrate decompress() stack speedups (from early pointer mods by McKie). X * Repair multi-file USERMEM gaffe. Unify 'force' flags to mimic semantics X * of SVR2 'pack'. Streamline block-compress table clear logic. Increase X * output byte count by magic number size. X * X * Revision 3.0 84/11/27 11:50:00 petsd!joe X * Set HSIZE depending on BITS. Set BITS depending on USERMEM. Unrolled X * loops in clear routines. Added "-C" flag for 2.0 compatibility. Used X * unsigned compares on Perkin-Elmer. Fixed foreground check. X * X * Revision 2.7 84/11/16 19:35:39 ames!jaw X * Cache common hash codes based on input statistics; this improves X * performance for low-density raster images. Pass on #ifdef bundle X * from Turkowski. X * X * Revision 2.6 84/11/05 19:18:21 ames!jaw X * Vary size of hash tables to reduce time for small files. X * Tune PDP-11 hash function. X * X * Revision 2.5 84/10/30 20:15:14 ames!jaw X * Junk chaining; replace with the simpler (and, on the VAX, faster) X * double hashing, discussed within. Make block compression standard. X * X * Revision 2.4 84/10/16 11:11:11 ames!jaw X * Introduce adaptive reset for block compression, to boost the rate X * another several percent. (See mailing list notes.) X * X * Revision 2.3 84/09/22 22:00:00 petsd!joe X * Implemented "-B" block compress. Implemented REVERSE sorting of tab_next. X * Bug fix for last bits. Changed fwrite to putchar loop everywhere. X * X * Revision 2.2 84/09/18 14:12:21 ames!jaw X * Fold in news changes, small machine typedef from thomas, X * #ifdef interdata from joe. X * X * Revision 2.1 84/09/10 12:34:56 ames!jaw X * Configured fast table lookup for 32-bit machines. X * This cuts user time in half for b <= FBITS, and is useful for news batching X * from VAX to PDP sites. Also sped up decompress() [fwrite->putc] and X * added signal catcher [plus beef in writeerr()] to delete effluvia. X * X * Revision 2.0 84/08/28 22:00:00 petsd!joe X * Add check for foreground before prompting user. Insert maxbits into X * compressed file. Force file being uncompressed to end with ".Z". X * Added "-c" flag and "zcat". Prepared for release. X * X * Revision 1.10 84/08/24 18:28:00 turtlevax!ken X * Will only compress regular files (no directories), added a magic number X * header (plus an undocumented -n flag to handle old files without headers), X * added -f flag to force overwriting of possibly existing destination file, X * otherwise the user is prompted for a response. Will tack on a .Z to a X * filename if it doesn't have one when decompressing. Will only replace X * file if it was compressed. X * X * Revision 1.9 84/08/16 17:28:00 turtlevax!ken X * Removed scanargs(), getopt(), added .Z extension and unlimited number of X * filenames to compress. Flags may be clustered (-Ddvb12) or separated X * (-D -d -v -b 12), or combination thereof. Modes and other status is X * copied with copystat(). -O bug for 4.2 seems to have disappeared with X * 1.8. X * X * Revision 1.8 84/08/09 23:15:00 joe X * Made it compatible with vax version, installed jim's fixes/enhancements X * X * Revision 1.6 84/08/01 22:08:00 joe X * Sped up algorithm significantly by sorting the compress chain. X * X * Revision 1.5 84/07/13 13:11:00 srd X * Added C version of vax asm routines. Changed structure to arrays to X * save much memory. Do unsigned compares where possible (faster on X * Perkin-Elmer) X * X * Revision 1.4 84/07/05 03:11:11 thomas X * Clean up the code a little and lint it. (Lint complains about all X * the regs used in the asm, but I'm not going to "fix" this.) X * X * Revision 1.3 84/07/05 02:06:54 thomas X * Minor fixes. X * X * Revision 1.2 84/07/05 00:27:27 thomas X * Add variable bit length output. X * X */ X X#include <stdio.h> X#include <ctype.h> X#include <signal.h> X#include <sys/types.h> X#include <sys/stat.h> X#ifndef __ZTC__ X#include <malloc.h> X#endif X#ifndef BSD4_2 X#include <stdlib.h> X#include <io.h> X#endif X#include <string.h> X#include <fcntl.h> X#ifdef MSDOS X#include <dos.h> X#endif X X#ifdef PROTO X/* X * Zortech appears to be missing this prototype, and MSC uses some X * silly structure as the second arg. Turbo C doesn't support this X * call at all. X */ Xextern int utime(char *path, time_t times[]); X#endif X X#define BITS 16 /* max number of bits/code */ X#define INIT_BITS 9 /* initial number of bits/code */ X X#define MAXCODE(n_bits) ((code_t)((1L << (n_bits)) - 1)) X X/* X * Magic numbers which should appear at the beginning of a compressed file. X */ X#define MAGIC0 0x1f X#define MAGIC1 0x9d X X/* X * Defines for third byte of header X */ X#define BIT_MASK 0x1f X#define BLOCK_MASK 0x80 X X#if 0 X#define CHECK_GAP 10000 /* ratio check interval */ X#endif X X/* X * the next two codes should not be changed lightly, as they must not X * lie within the contiguous general code space. X */ X#define FIRST 257 /* first free entry */ X#define CLEAR 256 /* table clear output code */ X X#define DE_STACKLEN 8192 /* Size of decoder stack */ X X#define HSIZE (1L << 16) /* Size of the hash table. Don't change this */ X Xtypedef unsigned char uchar; Xtypedef unsigned long ulong; Xtypedef unsigned short code_t; Xtypedef unsigned short hash_t; X X#ifdef PROTO X#define ARGS(x) x X#else X#define ARGS(x) () X#endif X Xvoid main ARGS((int argc, char **argv)); Xvoid Usage ARGS((void)); Xvoid version ARGS((void)); Xvoid compress ARGS((void)); Xvoid decompress ARGS((void)); Xvoid copystat ARGS((void)); Xvoid writeerr ARGS((void)); Xvoid cl_hash ARGS((void)); Xvoid putcode ARGS((code_t code)); Xvoid prratio ARGS((long num, long den)); Xint ofopen ARGS((char *filename)); Xint ifopen ARGS((char *filename)); Xint check_magic ARGS((void)); Xint need_clear ARGS((void)); Xvoid onintr ARGS(()); Xvoid oops ARGS(()); Xint taballoc ARGS((void)); Xvoid clearhash ARGS((void)); X X/* X * block compression parameters -- after all codes are used up, X * and compression rate changes, start over. X */ Xint block_compress = BLOCK_MASK; X Xint maxbits = BITS; /* user settable max # bits/code */ Xint magic = 1; /* 3-byte magic number header */ Xint zcat_flg = 0; /* Output on stdout */ Xint verbose = 0; /* don't tell me about compression */ Xint force = 0; /* Force overwrite of output file */ Xint do_decomp = 0; /* Decompress rather than compress. */ Xchar ofname[100]; /* Output file name */ Xint foreground; /* Running in foreground? */ Xint exit_stat = 0; /* Exit status */ Xuchar bitbuf[BITS+2]; /* For (dis)assembling code bytes */ Xint okunlink; /* OK for sig handler to unlink output file */ Xchar *ifname; X X#ifdef i8088 X Xuchar *de_stack; Xuchar far *charptr1; Xuchar far *codeptrs1[2]; Xuchar far *codeptrs2[2]; X X#define de_suffixof(i) charptr1[i] X#define de_prefixof(i) (*(code_t far *)&codeptrs1[i&1][i&~1]) X X#define en_hashchar(i) charptr1[i] X#define en_hashent(i) (*(code_t far *)&codeptrs1[i&1][i&~1]) X#define en_hashcode(i) (*(code_t far *)&codeptrs2[i&1][i&~1]) X X#ifndef MK_FP X#define MK_FP(seg, ofs) \ X ((void far *)(((ulong)(seg) << 16) | (unsigned)(ofs))) X#endif X X#define PARA 16 /* Size of a paragraph */ X X/* X * Return a segment address which is the segment part of the normalized X * version of "fp" rounded upwards. X * I use this on the far pointers returned by "farmalloc". While X * they are probably already normalized, I have never seen this X * stated anywhere in the doc's. X * X * There is a lot of junk below which would be unecessary if only X * there were a reasonably compiler independent way of allocating X * a given number of PARAGRAPHS (like TC's allocmem). I can't find X * one though. X */ X#define FP_SEGCEIL(fp) \ X (FP_SEG(fp) + (FP_OFF(fp) + PARA - 1)/PARA) X X/* X * Allocate space for the tables used in {en,de}coding. These tables X * reside in the far heap. It may seem inefficient to be using far pointers X * for the base of these tables, because the offset portion will always be zero. X * We could just keep the segment address of the base, and then do something X * like: X * *MK_FP(baseseg, offset) = blahblah; X * X * whenever we need to access the table. This SHOULD be more efficient, X * but the compilers do not appear to generate very efficient code in this X * case. Huge pointers are not used, because they are slow, and because X * Zortech does not support them. X */ X X#ifdef MSC X#define farmalloc(n) halloc(n, 1) X#endif X Xint taballoc() X{ X char far *X; X X if (do_decomp) { X if ((de_stack = malloc(DE_STACKLEN)) == 0) X return (0); X } X else { X if ((X = farmalloc((HSIZE + PARA) * sizeof(code_t))) == 0) X return (0); X codeptrs2[0] = MK_FP(FP_SEGCEIL(X), 0); X codeptrs2[1] = MK_FP(FP_SEGCEIL(X) + HSIZE/PARA, 0); X } X X if ((X = farmalloc((HSIZE + PARA) * sizeof(char))) == 0) X return (0); X charptr1 = MK_FP(FP_SEGCEIL(X), 0); X X if ((X = farmalloc((HSIZE + PARA) * sizeof(code_t))) == 0) X return (0); X codeptrs1[0] = MK_FP(FP_SEGCEIL(X), 0); X codeptrs1[1] = MK_FP(FP_SEGCEIL(X) + HSIZE/PARA, 0); X X return (1); X} X X#else X Xuchar chartab1[HSIZE]; Xcode_t codetab1[HSIZE]; Xcode_t codetab2[HSIZE]; X X#define de_suffixof(i) chartab1[i] X#define de_prefixof(i) codetab1[i] X#define de_stack (uchar *)codetab2 X X#define en_hashchar(i) chartab1[i] X#define en_hashent(i) codetab1[i] X#define en_hashcode(i) codetab2[i] X X#endif X Xvoid Usage() X{ X fprintf(stderr, "Usage: compress [-dfvcVnC] [-b maxbits] [file ...]\n"); X fprintf(stderr, " -V => print Version\n"); X fprintf(stderr, " -d => decompress\n"); X fprintf(stderr, " -v => verbose\n"); X fprintf(stderr, " -f => force overwrite of output file\n"); X fprintf(stderr, " -n => no header: useful to uncompress old files\n"); X fprintf(stderr, " -b maxbits => maxbits. Default %d\n", BITS); X fprintf(stderr, " -c => cat all output to stdout\n"); X fprintf(stderr, " -C => generate output compatible with compress 2.0.\n"); X} X X/***************************************************************** X * TAG( main ) X * X * Algorithm from "A Technique for High Performance Data Compression", X * Terry A. Welch, IEEE Computer Vol 17, No 6 (June 1984), pp 8-19. X * X * Usage: compress [-dfvc] [-b bits] [file ...] X * Inputs: X * -d: If given, decompression is done instead. X * X * -c: Write output on stdout, don't remove original. X * X * -b: Parameter limits the max number of bits/code. X * X * -f: Forces output file to be generated, even if one already X * exists, and even if no space is saved by compressing. X * If -f is not used, the user will be prompted if stdin is X * a tty, otherwise, the output file will not be overwritten. X * X * -v: Write compression statistics X * X * file ...: Files to be compressed. If none specified, stdin X * is used. X * Outputs: X * file.Z: Compressed form of file with same mode, owner, and utimes X * or stdout (if stdin used as input) X * X * Assumptions: X * When filenames are given, replaces with the compressed version X * (.Z suffix) only if the file decreases in size. X * Algorithm: X * Modified Lempel-Ziv method (LZW). Basically finds common X * substrings and replaces them with a variable size code. This is X * deterministic, and can be done on the fly. Thus, the decompression X * procedure needs no input table, but tracks the way the table was built. X */ X X#ifdef __ZTC__ X#include <int.h> Xint silly_nonsense(struct INT_DATA *foo) {raise(SIGINT); return 1;} X#endif X X#define ARGVAL() (*++(*argv) || (--argc && *++argv)) X Xvoid main(argc, argv) Xint argc; Xchar **argv; X{ X char tempname[100], *cp; X X if (signal(SIGINT, SIG_IGN) != SIG_IGN) { X signal(SIGINT, onintr); X#ifdef __ZTC__ X /* X * The "signal" call above isn't good enough for Zortech X */ X int_intercept(0x23, silly_nonsense, 256); X#endif X#ifdef SIGSEGV X signal(SIGSEGV, oops); X#endif X if (isatty(2)) X foreground = 1; X } X X#ifndef MSDOS X if ((cp = strrchr(argv[0], '/')) != 0) X cp++; X else X cp = argv[0]; X#else X for (cp = argv[0]; *cp; cp++) X if (*cp == '/' || *cp == '\\') X argv[0] = cp + 1; X cp = strlwr(argv[0]); X#endif X /* Limited to 8 char filenames under DOS */ X if (strncmp(cp, "uncompress", 8) == 0) X do_decomp = 1; X else if (strncmp(cp, "zcat", 4) == 0) { X do_decomp = 1; X zcat_flg = 1; X } X X#ifdef BSD4_2 X /* 4.2BSD dependent - take it out if not */ X setlinebuf(stderr); X#endif /* BSD4_2 */ X X for (argc--, argv++; argc > 0 && **argv == '-'; argc--, argv++) { X while (*++(*argv)) { /* Process all flags in this arg */ X switch (**argv) { X case 'V': X version(); X break; X case 'v': X verbose = 1; X break; X case 'd': X do_decomp = 1; X break; X case 'f': X case 'F': X force = 1; X break; X case 'n': X magic = 0; X break; X case 'C': X block_compress = 0; X break; X case 'b': X if (!ARGVAL()) { X fprintf(stderr, "Missing maxbits\n"); X Usage(); X exit(1); X } X maxbits = atoi(*argv); X goto nextarg; X case 'c': X zcat_flg = 1; X break; X case 'q': X verbose = 0; X break; X default: X fprintf(stderr, "Unknown flag: '%c'; ", **argv); X Usage(); X exit(1); X } X } Xnextarg:; X } X X#ifdef i8088 X if (! taballoc()) { X fprintf(stderr, "compress: out of memory\n"); X exit(1); X } X#endif X /* X * If no filename args, do standard input. X */ X if (argc <= 0) { X if (! ifopen((char *)0) || ! ofopen((char *)0)) X exit(1); X X ifname = "stdin"; X X if (do_decomp) { X if (!check_magic()) X exit(1); X decompress(); X } X else { X compress(); X if (verbose) X putc('\n', stderr); X } X exit(exit_stat); X } X X while (--argc >= 0) { X char *suf; X X ifname = *argv++; X suf = strrchr(ifname, '.'); X X exit_stat = 0; X okunlink = 0; X X if (do_decomp) { /* DECOMPRESSION */ X if (!suf || (strcmp(suf, ".Z") && strcmp(suf, ".z"))) { X strcpy(tempname, ifname); X strcat(tempname, ".Z"); X ifname = tempname; X } X if (! ifopen(ifname) || !check_magic()) X continue; X if (zcat_flg) X ofname[0] = '\0'; X else { X strcpy(ofname, ifname); X ofname[strlen(ifname) - 2] = '\0'; X } X if (!ofopen(ofname)) X continue; X if (!zcat_flg && verbose) X fprintf(stderr, "%s: ", ifname); X decompress(); X } X else { /* COMPRESSION */ X if (suf && (!strcmp(suf, ".Z") || !strcmp(suf, ".z"))) { X fprintf(stderr, "%s: already has .Z suffix -- no change\n", X ifname); X continue; X } X if (! ifopen(ifname)) X continue; X if (zcat_flg) X ofname[0] = 0; X else { X strcpy(ofname, ifname); X#ifndef MSDOS /* We'll let ofopen do the complaining */ X#ifndef BSD4_2 X if ((cp = strrchr(ofname, '/')) != NULL) X cp++; X else X cp = ofname; X if (strlen(cp) > 12) { X fprintf(stderr,"%s: filename too long to tack on .Z\n",cp); X continue; X } X#endif X#endif X strcat(ofname, ".Z"); X } X if (! ofopen(ofname)) X continue; X if (! zcat_flg && verbose) X fprintf(stderr, "%s: ", ifname); X compress(); X } X X if (! zcat_flg) { X copystat(); X if ((exit_stat == 1) || verbose) X putc('\n', stderr); X } X } X exit(exit_stat); X} X X/* X * compress stdin to stdout X * X * Algorithm: use open addressing double hashing (no chaining) on the X * prefix code / next character combination. We do a variant of Knuth's X * algorithm D (vol. 3, sec. 6.4) along with G. Knott's relatively-prime X * secondary probe. Here, the modular division first probe is gives way X * to a faster exclusive-or manipulation. Also do block compression with X * an adaptive reset, whereby the code table is cleared when the compression X * ratio decreases, but after the table fills. The variable-length output X * codes are re-sized at this point, and a special CLEAR code is generated X * for the decompressor. Late addition: construct the table according to X * file size for noticeable speed improvement on small files. Please direct X * questions about this implementation to ames!jaw. X * X * Secondary hash function changed slightly for DOS. Hash table used to be X * > 64K. This is slow on a 16 bit machine because it means long arithmetic, X * and more complicated addressing of tables in the far address space. X * We now restrict the table size to 64K, and, so that the table does X * not overfill, restrict the codes that we will generate to MAXMAXCODE. X * This causes slightly poorer compression in some cases, but, interestingly X * enough, also causes better compression ratios in certain other cases. X * Yes, this is all compatible with other compresses. X */ Xstatic long in_count; /* length of input */ Xstatic long out_count; /* length of compressed output */ Xstatic long ratio; /* in_count/out_count * 256 */ Xstatic int n_bits; /* number of bits/code */ Xstatic int n_bits8; /* bits/code times 8 */ Xstatic int bitoffset; /* Offset into bitbuf */ X X#define NOENT ((code_t)0xffff) X#define MAXMAXCODE ((code_t)0xf000) X X/* X * Clear out the hash table. We try to do this as quickly as possible, because X * it's running time dominates for small files. For big files, it doesn't matter X * much because it doesn't get called often. Now I understand why the original X * had a variable size hash table. X */ Xvoid clearhash() X{ X#ifdef i8088 X register unsigned i; X code_t far *hp; X X hp = (code_t far *)codeptrs1[0]; X i = (unsigned)(HSIZE/2); X do X *hp++ = NOENT; X while (--i > 0); X X hp = (code_t far *)codeptrs1[1]; X i = (unsigned)(HSIZE/2); X do X *hp++ = NOENT; X while (--i > 0); X#else X /* X * WARNING: assumes that NOENT == 0xffff X */ X memset((char *)codetab1, 0xff, HSIZE*sizeof(code_t)); X#endif X} X X/* X * Compress stdin to stdout. X */ Xvoid compress() X{ X register hash_t i; X register code_t ent; X hash_t disp; X int c; X code_t freecode; /* first unused entry */ X code_t maxcode; /* maximum code, given n_bits */ X code_t maxmaxcode; X code_t k; X#ifdef CHECK_GAP X long checkpoint = 0; X#endif X X if (maxbits < INIT_BITS) X maxbits = INIT_BITS; X if (maxbits > BITS) X maxbits = BITS; X X if (magic) { X putchar(MAGIC0); putchar(MAGIC1); X putchar(maxbits | block_compress); X if (ferror(stdout)) X writeerr(); X } X X bitbuf[bitoffset = 0] = 0; X out_count = 3; /* includes 3-byte header mojo */ X ratio = 0; X in_count = 1; X X n_bits = INIT_BITS; X n_bits8 = INIT_BITS << 3; X maxcode = MAXCODE(INIT_BITS); X maxmaxcode = MAXCODE(maxbits); X if (maxmaxcode > MAXMAXCODE) X maxmaxcode = MAXMAXCODE; X X freecode = ((block_compress) ? FIRST : 256); X X clearhash(); X X ent = getchar(); X X while ((c = getchar()) != EOF) { X in_count++; X X i = (hash_t)(c << 8) ^ ent; /* xor hashing */ X X if ((k = en_hashent(i)) == ent && en_hashchar(i) == (uchar)c) { X ent = en_hashcode(i); X goto Continue; X } X X if (k != NOENT) { X /* X * New secondary hash for 64K table. X * Experiment shows that the shift by 6 works well. X * Beats me why. "disp" must be relatively X * prime to the table size. Since the table size is a X * power of 2, this means "disp" must be odd. X * X * Note that we do not do a range check before doing X * "i -= disp". It is assumed that the hash table size X * (HSIZE) is 64K, and that the type "hash_t" (which X * is unsigned short) is 16 bits. Thus it is impossible X * for "i" to be out of range. On a machine with something X * other than 16 bit shorts, this would have to change. X */ X disp = ((hash_t)(c << 6) ^ ent) | 1; X do { X i -= disp; X if ((k = en_hashent(i)) == ent && X en_hashchar(i) == (uchar)c) { X ent = en_hashcode(i); X goto Continue; X } X } while (k != NOENT); X } X X putcode(ent); X X if (freecode <= maxmaxcode) { X /* X * Add the new entry. X */ X en_hashchar(i) = (uchar)c; X en_hashent(i) = ent; X en_hashcode(i) = freecode; X X /* X * If the next entry is going to be too big for the X * code size, then increase it, if possible. X */ X if (freecode++ > maxcode) { X while (bitoffset) X putcode(0); X ++n_bits; X n_bits8 += 8; X maxcode = MAXCODE(n_bits); X } X } X#ifdef CHECK_GAP X else if (in_count >= checkpoint && block_compress) { X checkpoint = in_count + CHECK_GAP; X if (need_clear()) { X#else X else if (block_compress) { X if (1) { X#endif X putcode(CLEAR); X while (bitoffset > 0) X putcode(0); X clearhash(); X freecode = FIRST; X maxcode = MAXCODE(INIT_BITS); X n_bits = INIT_BITS; X n_bits8 = n_bits << 3; X } X } X ent = c; XContinue:; X } X /* X * Put out the final code. X */ X putcode(ent); X X /* X * At EOF, write the rest of the buffer. X */ X if (bitoffset > 0) X fwrite(bitbuf, 1, (bitoffset + 7) / 8, stdout); X out_count += (bitoffset + 7) / 8; X fflush(stdout); X if (ferror(stdout)) X writeerr(); X X /* X * Print out stats on stderr X */ X if (! zcat_flg && verbose) { X fprintf(stderr, "Compression: "); X prratio(in_count - out_count, in_count); X } X if (out_count > in_count) /* exit(2) if no savings */ X exit_stat = 2; X} X X/* X * Output the given code. Assumes that chars are 8 bits. X * "n_bits" output bytes (containing 8 codes) are assembled X * in in "bitbuf", and then written out. X */ Xvoid putcode(code) Xcode_t code; X{ X register int i; X register uchar *bp; X X bp = &bitbuf[(bitoffset >> 3)]; X i = bitoffset & 7; X bp[0] |= (uchar)(code << i); X bp[1] = (uchar)(code >>= (8 - i)); X bp[2] = (uchar)(code >> 8); X X if ((bitoffset += n_bits) == n_bits8) { X bp = bitbuf; X i = n_bits; X out_count += i; X do X putchar(*bp++); X while (--i); X bitbuf[bitoffset = 0] = 0; X } X} X X#ifdef CHECK_GAP X/* X * Compute the current compression ratio, and return non-zero if X * it is has decreased since the last we checked. X * X * Don't use this anymore. Whenever the hash table fills, X * we send a CLEAR immediately (if block_compress). This is faster, X * and doesn't appear to affect the compression ratio much. X */ Xint need_clear() X{ X long rat; X X if (in_count > 0x007fffffL) { /* shift will overflow */ X rat = out_count >> 8; X if (rat == 0) /* Don't divide by zero */ X rat = 0x7fffffffL; X else X rat = in_count / rat; X } else X rat = (in_count << 8) / out_count; X X if (rat > ratio) { X ratio = rat; X return (0); X } X else { X ratio = 0; X return (1); X } X} X#endif X X/* X * Decompress stdin to stdout. This code assumes that chars are 8 bits. X */ Xvoid decompress() X{ X register uchar *stackp; X register code_t code; X code_t oldcode, incode; X code_t codemask; X code_t freecode; /* first unused entry */ X code_t maxcode; /* maximum code, given n_bits */ X code_t maxmaxcode; X int finchar; X int size; /* #bits in bitbuf */ X int bitoff; /* Offset into bitbuf */ X int n_bits; /* number of bits/code */ X#ifndef i8088 X register uchar *bp; X#endif X X n_bits = INIT_BITS; X maxcode = MAXCODE(INIT_BITS) - 1; X codemask = MAXCODE(INIT_BITS); X freecode = ((block_compress) ? FIRST : 256) - 1; X maxmaxcode = MAXCODE(maxbits); X X /* X * Read the first code into "oldcode" X */ X if ((size = fread(bitbuf, 1, n_bits, stdin)) <= 0) X return; X size = (size << 3) - (n_bits - 1); X oldcode = (bitbuf[0] | (bitbuf[1] << 8)) & codemask; X bitoff = n_bits; X X /* X * First code must be 8 bits == char. Write it, and die X * if it can't be written. X */ X putchar(finchar = oldcode); X if (ferror(stdout)) X writeerr(); X X stackp = de_stack; X X for ( ; ; ) { X if (bitoff >= size) { X if ((size = fread(bitbuf, 1, n_bits, stdin)) <= 0) X break; X /* Round size down to integral number of codes */ X size = (size << 3) - (n_bits - 1); X bitoff = 0; X } X /* X * Read the next code into "code". On the 8088, X * a slight speedup is possible because it has the right byte X * order, and no alignment restrictions. X */ X#ifdef i8088 X code = ((code_t)(*(long *)&bitbuf[(bitoff >> 3)] >> X (bitoff&7))) & codemask; X#else X bp = &bitbuf[(bitoff >> 3)]; X code = (code_t)(((bp[0] | (code_t)bp[1] << 8) | X (ulong)bp[2] << 16) >> (bitoff & 7)) & codemask; X#endif X bitoff += n_bits; X X if ((code == CLEAR) && block_compress) { X n_bits = INIT_BITS; X maxcode = MAXCODE(INIT_BITS) - 1; X codemask = MAXCODE(INIT_BITS); X freecode = (FIRST - 1) - 1; X size = 0; X continue; X } X incode = code; X X /* X * Special case for KwKwK string. X */ X if (code > freecode) { X if (code != freecode + 1) X oops(); X *stackp++ = (uchar)finchar; X code = oldcode; X } X X /* X * Generate output characters in reverse order X */ X while (code >= 256) { X *stackp++ = de_suffixof(code); X code = de_prefixof(code); X } X X /* X * And write them out in the forward order. X */ X putchar(finchar = code); X for (code = (stackp - de_stack) + 1; --code != 0; ) X putchar(*--stackp); X X /* X * Generate the new entry. X */ X if (freecode < maxmaxcode) { X if (++freecode > maxcode) { X if (++n_bits == maxbits) X maxcode = maxmaxcode; X else X maxcode = MAXCODE(n_bits) - 1; X size = 0; X codemask = MAXCODE(n_bits); X } X de_prefixof(freecode) = oldcode; X de_suffixof(freecode) = (uchar)finchar; X } X /* X * Remember previous code. X */ X oldcode = incode; X } X fflush(stdout); X if (ferror(stdout)) X writeerr(); X} X X/* X * Check a compressed file to make sure it has the proper magic number X * at the beginning. Also read the third byte to determine "maxbits", X * and "block_compress". X */ Xint check_magic() X{ X if (! magic) X return (1); X if ((getchar() != MAGIC0) || (getchar() != MAGIC1)) { X fprintf(stderr, "%s: not in compressed format\n", ifname); X return (0); X } X maxbits = getchar(); /* set -b from file */ X block_compress = maxbits & BLOCK_MASK; X maxbits &= BIT_MASK; X if (maxbits > BITS) { X fprintf(stderr, X "%s: compressed with %d bits, can only handle %d bits\n", X ifname, maxbits, BITS); X return (0); X } X return (1); X} X Xvoid writeerr() X{ X perror(ofname); X fclose(stdout); X unlink(ofname); X exit(1); X} X X/* X * Copy the permissions and file times from the input file to the X * output. X */ Xvoid copystat() X{ X struct stat statbuf; X int mode; X void (* ss)(); X#ifndef __TURBOC__ X time_t timep[2]; X#else X struct ftime filetime; X int fd; X#endif X X fclose(stdout); X if (stat(ifname, &statbuf)) { /* Get stat on input file */ X perror(ifname); X return; X } X if ((statbuf.st_mode & S_IFMT) != S_IFREG) { X if (! verbose) X fprintf(stderr, "%s: ", ifname); X fprintf(stderr, " -- not a regular file: unchanged"); X exit_stat = 1; X } X else if (statbuf.st_nlink > 1) { X if (! verbose) X fprintf(stderr, "%s: ", ifname); X fprintf(stderr, " -- has %d other links: unchanged", X statbuf.st_nlink - 1); X exit_stat = 1; X } X else if (exit_stat == 2 && !force) { /* No compression: remove file.Z */ X if (verbose) X fprintf(stderr, " -- file unchanged"); X } X else { /* ***** Successful Compression ***** */ X exit_stat = 0; X mode = statbuf.st_mode & 07777; X#ifndef __ZTC__ X if (chmod(ofname, mode)) /* Copy modes */ X perror(ofname); X#endif X#ifndef MSDOS X chown(ofname, statbuf.st_uid, statbuf.st_gid); /* Copy ownership */ X#endif X#ifndef __TURBOC__ X timep[0] = statbuf.st_atime; X timep[1] = statbuf.st_mtime; X utime(ofname, timep); X#else X if ((fd = open(ofname, O_RDONLY)) >= 0) { X if (getftime(fileno(stdin), &filetime) == 0) X setftime(fd, &filetime); X close(fd); X } X#endif X fclose(stdin); X ss = signal(SIGINT, SIG_IGN); X okunlink = 0; X /* ^C here would leave both input, and output files around */ X if (unlink(ifname)) /* Remove input file */ X perror(ifname); X signal(SIGINT, ss); X if (verbose) X fprintf(stderr, " -- replaced with %s", ofname); X return; /* Successful return */ X } X X /* Unsuccessful return -- one of the tests failed */ X X if (unlink(ofname)) X perror(ofname); X} X Xvoid onintr() X{ X fclose(stdout); X if (okunlink) X unlink(ofname); X exit(1); X} X Xvoid oops() /* wild pointer -- assume bad input */ X{ X if (do_decomp) X fprintf (stderr, "uncompress: %s is corrupt.\n", ifname); X fclose(stdout); X if (okunlink) X unlink(ofname); X exit(1); X} X Xvoid prratio(num, den) Xlong int num, den; X{ X register int q; /* Doesn't need to be long */ X X if (num > 214748L) /* 2147483647/10000 */ X q = (int)(num / (den / 10000L)); X else X q = (int)(10000L * num / den); /* Long calculations, though */ X if (q < 0) { X putc('-', stderr); X q = -q; X } X fprintf(stderr, "%d.%02d%%", q / 100, q % 100); X} X Xvoid version() X{ X fprintf(stderr, "%s\n", rcs_ident); X fprintf(stderr, "BITS = %d\n", BITS); X} X X/* X * Open the file "ofname" for binary output with possible check X * for overwrite. If all goes well, return non-zero, else zero. X */ Xint ofopen(filename) Xchar *filename; X{ X static char IOoutbuf[8192]; X struct stat statbuf; X X if (filename && !*filename) X filename = 0; X X /* X * Check for overwrite of existing file X */ X if (filename && !force && stat(filename, &statbuf) == 0) { X char response[2]; X response[0] = 'n'; X fprintf(stderr, "%s already exists;", filename); X if (foreground) { X fprintf(stderr, " do you wish to overwrite %s (y or n)? ", filename); X fflush(stderr); X read(2, response, 2); X while (response[1] != '\n') { X if (read(2, response+1, 1) < 0) { /* Ack! */ X perror("stderr"); X break; X } X } X } X if (response[0] != 'y') { X fprintf(stderr, "\tnot overwritten\n"); X return (0); X } X } X X okunlink = 1; X /* X * Open the output file. X */ X if (filename && !freopen(filename, "wb", stdout)) { X perror(filename); X return (0); X } X#ifdef O_BINARY X setmode(fileno(stdout), O_BINARY); X#else X#ifdef __ZTC__ X /* X * I'm sure there must be a better way in Zortech C to change the X * mode of an already opened file, but I can't find it. It doesn't X * have a "setmode" call it seems. X */ X stdout->_flag &= ~_IOTRAN; X#endif X#endif X setvbuf(stdout, IOoutbuf, _IOFBF, sizeof(IOoutbuf)); X return (1); X} X Xifopen(filename) Xchar *filename; X{ X static char IOinbuf[8192]; X X if (filename && !freopen(filename, "rb", stdin)) { X perror(filename); X return (0); X } X#ifdef O_BINARY X setmode(fileno(stdin), O_BINARY); X#else X#ifdef __ZTC__ X stdin->_flag &= ~_IOTRAN; X#endif X#endif X setvbuf(stdin, IOinbuf, _IOFBF, sizeof(IOinbuf)); X return (1); X} END_OF_FILE if test 32700 -ne `wc -c <'compress.c'`; then echo shar: \"'compress.c'\" unpacked with wrong size! fi # end of 'compress.c' fi echo shar: End of shell archive. exit 0